#==========================================================================
#
#  R code to create figures for the coalition paper
#
#       Daina Chiba (dchiba@essex.ac.uk)
#       Jesse C. Johnson (j.johnson@uky.edu)
#
#       Last modified: 2018-06-22
#==========================================================================

rm(list=ls()) # Delete any objects in the memory

# packages ----------------------------------------------------------------

library(ggplot2)
library(readstata13) # for read.dta13
library(grid)    # for unit()
library(MASS)    # for mvrnorm()
library(VGAM)    # for pbinorm()
# library(corpcor) # for make.positive.definite

# Load data ---------------------------------------------------------------

# Crisis-phase-level data
cris.p <- read.dta13("coalition_crisis_phase.dta")

# Crisis-level data
cris0 <- read.dta13("coalition_crisislevel.dta")
cris0 $ duration <- cris0 $ maxterm - cris0 $ mintrig


# Non-TVC version (i.e., retain the last phase per crisis)

cris <- cris.p[cris.p $ failure == 1, ]
cris $ coalition.max <- as.numeric(by(cris.p $ coalition_phase, cris.p $ crisno, max))
cris $ duration <- cris $ "_t"

summary(cris $ duration)
summary(as.numeric(cris0 $ duration))
summary(cris.p $ duration_crisis)


# Descriptive statistics --------------------------------------------------

## Difference of means: x = coalition, y = duration
summary(cris $ duration)
t.test(cris $ duration ~ cris $ coalition.max)

table(cris $ coalition.max)
by(cris $ duration, cris $ coalition.max, mean)
by(cris $ duration, cris $ coalition.max, summary)

# Color palette
cbgFillPalette <- scale_fill_manual(values=c("gray90", "gray20"))

# Figures

cris $ coal.fac <- "With coalition"
cris $ coal.fac[cris $ coalition.max == 0] <- "Without coalition"

cris.plot <- cris[c("duration", "coal.fac")]
cris.plot.pool <- data.frame(duration = cris $ duration, coal.fac = "Pooled")
cris.plot <- rbind(cris.plot, cris.plot.pool)

cris.plot $ coal.fac <- factor(cris.plot $ coal.fac, levels = c("Pooled", "Without coalition", "With coalition"))
table(cris.plot $ coal.fac)

g <- ggplot(data = cris, aes(duration)) + theme_bw()
g <- g + geom_histogram()
g <- g + facet_grid( coal.fac ~ .) + scale_x_continuous("Crisis duration (days)")
g
# ggsave(file="Figure1_histogram_two.pdf",width=7,height=5)

g <- ggplot(data = cris.plot, aes(duration)) + theme_bw()
g <- g + geom_histogram()
g <- g + facet_grid( coal.fac ~ .) + scale_x_continuous("Crisis duration (days)")
g
ggsave(file="figs/Figure1_histogram.pdf",width=5,height=6)


# Simulate coefficients ---------------------------------------------------

## Load coefficient estimates
beta.m5 <- as.matrix(read.table("estimates/m5_logn_beta.txt", header=T))
beta.m6 <- as.matrix(read.table("estimates/m6_logn_beta.txt", header=T))
beta.m7 <- as.matrix(read.table("estimates/m7_logn_beta.txt", header=T))
beta.m8 <- as.matrix(read.table("estimates/m8_logn_beta.txt", header=T))
beta.m4j <- as.matrix(read.table("estimates/m4j_logn_beta.txt", header=T))
beta.m4 <- as.matrix(read.table("estimates/m4_logn_beta.txt", header=T))
beta.m10 <- as.matrix(read.table("estimates/m10_coalsize_beta.txt", header=T))

## var-cov matrices
vcov.m5 <- as.matrix(read.table("estimates/m5_logn_vcov.txt", header=T))
vcov.m6 <- as.matrix(read.table("estimates/m6_logn_vcov.txt", header=T))
vcov.m7 <- as.matrix(read.table("estimates/m7_logn_vcov.txt", header=T))
vcov.m8 <- as.matrix(read.table("estimates/m8_logn_vcov.txt", header=T))
vcov.m4j <- as.matrix(read.table("estimates/m4j_logn_vcov.txt", header=T))
vcov.m4 <- as.matrix(read.table("estimates/m4_logn_vcov.txt", header=T))
vcov.m10 <- as.matrix(read.table("estimates/m10_coalsize_vcov.txt", header=T))

## Draw simulated beta from MVN(b,V)
nsims <- 1000   ## Number of resampling repetitions
set.seed(123456789)
simb.m5 <- mvrnorm(n = nsims, mu = beta.m5, Sigma = vcov.m5)
simb.m6 <- mvrnorm(n = nsims, mu = beta.m6, Sigma = vcov.m6)
simb.m7 <- mvrnorm(n = nsims, mu = beta.m7, Sigma = vcov.m7)
simb.m8 <- mvrnorm(n = nsims, mu = beta.m8, Sigma = vcov.m8)
simb.m4j <- mvrnorm(n = nsims, mu = beta.m4j, Sigma = vcov.m4j)
simb.m4 <- mvrnorm(n = nsims, mu = beta.m4, Sigma = vcov.m4)
simb.m10 <- mvrnorm(n = nsims, mu = beta.m10, Sigma = vcov.m10)


# Calculate substantive effects -------------------------------------------

# Median x values
med.cinc <- median(cris.p $ cinc_disparity)
med.cinc.pcp <- median(cris.p $ cinc_disparity_pcp)
med.glob <- median(cris.p $ globorg_dum)
med.ne <- median(cris.p $ neighbors_total_max)
med.nl <- median(cris.p $ neighbors_land_max)

# Mean FEs
meanFE <- rep(NA, 30)
for (i in 1:30){
  meanFE[i] <- mean(eval(parse(text=paste(as.character("cris.p"), "$", as.character("pcdum"), i, sep=""))))
}

# covariates for the first stage
covs.coal.m5    <- cbind(med.glob, med.cinc.pcp, med.ne, 1) # for land+sea (model 5)
covs.coal.m6 <- cbind(med.glob, med.cinc.pcp, med.nl, 1) # for land contiguity only (model 6)

# covariates for duration: FE set equal to non-protracted conflict
covsfe.0 <- t(c(0, med.glob, med.cinc, 1, rep(0,29)))
covsfe.1 <- t(c(1, med.glob, med.cinc, 1, rep(0,29)))

# covariates for duration: mean FE
covs.mfe.0 <- t(c(0, med.glob, med.cinc, meanFE))
covs.mfe.1 <- t(c(1, med.glob, med.cinc, meanFE))


# Function to calculate quantities of interests for univaraite models
qoi.univ <- function(beta, t.max = 3650, 
                     covsfe.0. = covsfe.0, covsfe.1. = covsfe.1){
  k.d <- 1:33; k.s <- 34
  # auxiliary parameters
  sig <- exp(beta[, k.s])
  # survival function
  time <- seq(from=1, to=t.max, by = 1)
  St.0 <- St.1 <- matrix(NA, nrow=length(time), ncol = nsims)
  for(i in 1:length(time)){
    St.1[i, ] <- pmin(1-1e-8, pmax(1e-8, 1 - pnorm((log(time[i]) - covsfe.1. %*% t(beta[,k.d]))/sig)  ))
    St.0[i, ] <- pmin(1-1e-8, pmax(1e-8, 1 - pnorm((log(time[i]) - covsfe.0. %*% t(beta[,k.d]))/sig)  ))
  }
  # Integrate above to calculate e(T)
  et.1 <- apply(St.1, 2, sum)
  et.0 <- apply(St.0, 2, sum)
  return(list(cond.surv.1=St.1, cond.surv.0=St.0, et.1=et.1, et.0=et.0))
}

## Expected duration from model 4 (univariate)
cs.m4 <- qoi.univ(beta = simb.m4)
mean(cs.m4$et.0); mean(cs.m4$et.1)

# Function to calculate quantities of interests

qoi <- function(beta, t.max = 3650, 
                covs.coal = covs.coal.m5,
                covsfe.0. = covsfe.0, covsfe.1. = covsfe.1){
  k.c <- 1:4; k.d <- 5:37; k.s <- 38; k.r <- 39
  # Pr( coal = 1 ) and Pr( coal = 0 )
  xg <- covs.coal %*% t(beta[, k.c])
  pr.c1 <- pnorm(xg)
  pr.c0 <- 1 - pnorm(xg)
  # auxiliary parameters
  rho <- (exp(2 * beta[,k.r])-1)/(exp(2 * beta[,k.r])+1)
  sig <- exp(beta[, k.s])
  # survival function
  time <- seq(from=1, to=t.max, by = 1)
  St.0 <- St.1 <- matrix(NA, nrow=length(time), ncol = nsims)
  for(i in 1:length(time)){
    St.1[i, ] <- pmin(1-1e-8, pmax(1e-8, 1 - pnorm((log(time[i]) - covsfe.1. %*% t(beta[,k.d]))/sig)  ))
    St.0[i, ] <- pmin(1-1e-8, pmax(1e-8, 1 - pnorm((log(time[i]) - covsfe.0. %*% t(beta[,k.d]))/sig)  ))
  }
  # Pr(c=1/0 and t > T)
  cond.surv.1 <- cond.surv.0 <- matrix(NA, nrow = t.max, ncol = nsims)
  for(i in 1:t.max){
    cond.surv.1[i,] <- pbinorm(xg, qnorm(St.1[i,]), cov12 = rho )/pr.c1
    cond.surv.0[i,] <- pbinorm(xg, qnorm(St.0[i,]), cov12 = rho )/pr.c1 # counterfactual
  }
  # Integrate above to calculate e(T)
  et.1 <- apply(cond.surv.1, 2, sum)
  et.0 <- apply(cond.surv.0, 2, sum)
  return(list(cond.surv.1=cond.surv.1, cond.surv.0=cond.surv.0, et.1=et.1, et.0=et.0))
}


## Expected duration (model 4 obtained by estimating the joint model with rho=0 assumed)
# We do this to check if the results we obtain from Model 4 (univariate) are roughly 
# the same as the results we obtain from Model 4j (joint model with rho=0).
cs.m4j <- qoi(beta = simb.m4j)
mean(cs.m4j$et.0); mean(cs.m4j$et.1)

mean(cs.m4$et.0); mean(cs.m4$et.1)

## Expected duration (model 5)
cs.m5 <- qoi(beta = simb.m5)
mean(cs.m5$et.0); mean(cs.m5$et.1)

## Expected duration (model 6)
cs.m6 <- qoi(beta = simb.m6, covs.coal = covs.coal.m6)
mean(cs.m6$et.0); mean(cs.m6$et.1)

## Expected duration (model 7)
cs.m7 <- qoi(beta = simb.m7)
mean(cs.m7$et.0); mean(cs.m7$et.1)

## Expected duration (model 8)
cs.m8 <- qoi(beta = simb.m8)
mean(cs.m8$et.0); mean(cs.m8$et.1)


## Function to calculate tailored confidence interval

tci <- function(et.1, et.0){
  plev <- seq(from = 2.5, to = 15, by = 0.05)
  ediff <- et.1 - et.0
  tbl <- matrix(NA, ncol = 8, nrow = length(plev))
  for(i in 1:length(plev)){
    tbl[i,1] <- quantile(ediff, probs = c(plev[i]/100))
    tbl[i,2] <- tbl[i,4] <- plev[i]
    tbl[i,6] <- 100 - plev[i]
    tbl[i,8] <- 100 - plev[i] * 2
    tbl[i,3] <- quantile(et.1, probs = c(plev[i]/100))
    tbl[i,5] <- quantile(et.0, probs = c(1-plev[i]/100))
    tbl[i,7] <- tbl[i,3] - tbl[i,5]
  }
  # Choose the TCI level
  tci.lev <- tbl[ tbl[,7] == max(tbl[tbl[,7] < tbl[1,1],7]), 2]
  return(tci.lev)
}


# Function to print the effects
print.eff <- function(qoi){
  tci <- tci(et.1 = qoi $ et.1, et.0 = qoi $ et.0)
  print("Expected duration | coal=0")
  print(mean(qoi $ et.0))
  print(quantile(qoi $ et.0, probs = c(tci/100, 1-tci/100)))
  print("Expected duration | coal=1")
  print(mean(qoi $ et.1))
  print(quantile(qoi $ et.1, probs = c(tci/100, 1-tci/100)))
  eff <- qoi$et.1 - qoi$et.0
  print("Estimated effect of coalition on the treated")
  print(mean(eff))
  print(quantile(eff, probs = c(.025, .5, .975)))
  p.inc <- 100 * eff/qoi$et.0
  print("Percentage increase")
  print(mean(p.inc))
  print(quantile(p.inc, probs = c(.025, .5, .975)))
}


## Expected duration with TCI

# Model 4 (univariate)
print.eff(cs.m4)

# Model 4 (joint, but assuming independence, i.e., rho=0)
print.eff(cs.m4j)

# Model 5
print.eff(cs.m5)

# Model 6
print.eff(cs.m6)

# Model 7
print.eff(cs.m7)

# Model 8
print.eff(cs.m8)

# Print output in a file
sink(file = "estimates/effects.txt")
print("Model 4 (univariate)"); print.eff(cs.m4); print("")
print("Model 4 (joint, but assuming independence, i.e., rho=0)"); print.eff(cs.m4j); print("")
print("Model 5"); print.eff(cs.m5); print("")
print("Model 6"); print.eff(cs.m6); print("")
print("Model 7"); print.eff(cs.m7); print("")
print("Model 8"); print.eff(cs.m8); print("")
sink()


# Plot substantive effects ------------------------------------------------

plot.mat.0.m4 <- data.frame(et = cs.m4$et.0, value = "without coalition")
plot.mat.1.m4 <- data.frame(et = cs.m4$et.1, value = "with coalition")
plot.mat.m4 <- rbind(plot.mat.0.m4, plot.mat.1.m4)
plot.mat.0.m4j <- data.frame(et = cs.m4j$et.0, value = "without coalition")
plot.mat.1.m4j <- data.frame(et = cs.m4j$et.1, value = "with coalition")
plot.mat.m4j <- rbind(plot.mat.0.m4j, plot.mat.1.m4j)
plot.mat.0.m5 <- data.frame(et = cs.m5$et.0, value = "without coalition")
plot.mat.1.m5 <- data.frame(et = cs.m5$et.1, value = "with coalition")
plot.mat.m5 <- rbind(plot.mat.0.m5, plot.mat.1.m5)

# Figure 2 (based on model 4: univariate)
print.eff(cs.m4)

y.h.0 <- 0.001; y.h.1 <- 0.001
g <- ggplot(data = plot.mat.m4, aes(et, fill = value))
g <- g + geom_density(alpha = 0.8) 
g <- g + cbgFillPalette + theme_bw()
g <- g + scale_x_continuous("Predicted crisis duration (days)", limit=c(0,550),expand=c(0,0))
g <- g + scale_y_continuous("Density",limits=c(0,0.03))
g <- g + theme(legend.position = c(.8,.8), 
               legend.background = element_rect(fill="white",colour="black"),
               legend.title = element_text(size=12), 
               legend.text = element_text(size = 12),
               legend.key.width = unit(1,"cm"), 
               axis.ticks = element_blank(), axis.line=element_line(colour=NA), 
               axis.title.y = element_text(size=12,angle=90), axis.title.x = element_text(size=12,angle=0),
               axis.text.y = element_text(size=11), axis.text.x = element_text(size=11))
g <- g + labs(fill = "Crisis duration")
g <- g + geom_text(label = "mean = 128", x = 128, y = 0.028)
g <- g + geom_text(label = "mean = 384", x = 384, y = 0.008)
print(g)
ggsave(file="figs/Figure2_m4.pdf",width=8,height=5)

# Figure 2 (based on model 4: joint but independent)
print.eff(cs.m4j)

y.h.0 <- 0.001; y.h.1 <- 0.001
g <- ggplot(data = plot.mat.m4j, aes(et, fill = value))
g <- g + geom_density(alpha = 0.8) 
g <- g + cbgFillPalette + theme_bw()
g <- g + scale_x_continuous("Predicted crisis duration (days)", limit=c(0,550),expand=c(0,0))
g <- g + scale_y_continuous("Density",limits=c(0,0.03))
g <- g + theme(legend.position = c(.8,.8), 
               legend.background = element_rect(fill="white",colour="black"),
               legend.title = element_text(size=12), 
               legend.text = element_text(size = 12),
               legend.key.width = unit(1,"cm"), 
               axis.ticks = element_blank(), axis.line=element_line(colour=NA), 
               axis.title.y = element_text(size=12,angle=90), axis.title.x = element_text(size=12,angle=0),
               axis.text.y = element_text(size=11), axis.text.x = element_text(size=11))
g <- g + labs(fill = "Crisis duration")
g <- g + geom_text(label = "mean = 128", x = 128, y = 0.028)
g <- g + geom_text(label = "mean = 383", x = 383, y = 0.008)
print(g)
ggsave(file="figs/Figure2_m4j.pdf",width=8,height=5)

# Figure 3 (based on model 5)
print.eff(cs.m5)

y.h.0 <- 0.001; y.h.1 <- 0.001
g <- ggplot(data = plot.mat.m5, aes(et, fill = value))
g <- g + geom_density(alpha = 0.8) 
g <- g + cbgFillPalette + theme_bw()
g <- g + scale_x_continuous("Predicted crisis duration (days)", limit=c(0,550),expand=c(0,0))
g <- g + scale_y_continuous("Density",limits=c(0,0.1))
g <- g + theme(legend.position = c(.8,.8), 
               legend.background = element_rect(fill="white",colour="black"),
               legend.title = element_text(size=12), 
               legend.text = element_text(size = 12),
               legend.key.width = unit(1,"cm"), 
               axis.ticks = element_blank(), axis.line=element_line(colour=NA), 
               axis.title.y = element_text(size=12,angle=90), axis.title.x = element_text(size=12,angle=0),
               axis.text.y = element_text(size=11), axis.text.x = element_text(size=11))
g <- g + labs(fill = "Crisis duration")
g <- g + geom_text(label = "mean = 12", x = 50, y = 0.028)
g <- g + geom_text(label = "mean = 296", x = 296, y = 0.01)
print(g)
ggsave(file="figs/Figure3_m5.pdf",width=8,height=5)







# Marginal effects based on model 10 (coalition size) ---------------------

# coalition size model
beta.m10
table(cris $ coalsize)
covs.cs1 <- cbind(log(1), med.glob, med.cinc, 1)
covs.cs2 <- cbind(log(2), med.glob, med.cinc, 1)
covs.cs3 <- cbind(log(3), med.glob, med.cinc, 1)
covs.cs4 <- cbind(log(4), med.glob, med.cinc, 1)
covs.cs5 <- cbind(log(5), med.glob, med.cinc, 1)
covs.cs11 <- cbind(log(11), med.glob, med.cinc, 1)

# Expected median duration for different values of oalition size
med.time.1 <- t(exp( covs.cs1 %*% t(simb.m10[, 1:4])))
med.time.2 <- t(exp( covs.cs2 %*% t(simb.m10[, 1:4])))
med.time.3 <- t(exp( covs.cs3 %*% t(simb.m10[, 1:4])))
med.time.4 <- t(exp( covs.cs4 %*% t(simb.m10[, 1:4])))
med.time.5 <- t(exp( covs.cs5 %*% t(simb.m10[, 1:4])))
med.time.11 <- t(exp( covs.cs11 %*% t(simb.m10[, 1:4])))

mean(med.time.1)
mean(med.time.2)
mean(med.time.3)
mean(med.time.4)
mean(med.time.5)
mean(med.time.11)

# Change in duration
mean(med.time.2) - mean(med.time.1)
mean(med.time.3) - mean(med.time.2)
mean(med.time.4) - mean(med.time.3)
mean(med.time.5) - mean(med.time.4)

# Effect of changing a 2-party coalition to a 3-party coalition
mean(med.time.3 - med.time.2)
apply(X = med.time.3 - med.time.2, MARGIN = 2, FUN = quantile, probs = c(.025, .975))



sink(file = "estimates/effects.txt", append = TRUE)
print("Duration when coalition=1"); print(mean(med.time.1))
print("Duration when coalition=2"); print(mean(med.time.2))
print("Duration when coalition=3"); print(mean(med.time.3))
print("Duration when coalition=4"); print(mean(med.time.4))
print("Duration when coalition=5"); print(mean(med.time.5))
print("Duration when coalition=11"); print(mean(med.time.11))
print("Effect of changing a 2-party coalition to a 3-party coalition")
print(mean(med.time.3 - med.time.2))
print(apply(X = med.time.3 - med.time.2, MARGIN = 2, FUN = quantile, probs = c(.025, .975)))
sink()

